################################################################################
### Elite Responses to Ethnic Diversity and Interethnic Contact              ###
### Data Analysis Preparation File                                           ###
### William O'Brochta                                                        ###
### Louisiana Tech University                                                ###
################################################################################


library(plyr)
library(dplyr)
library(lme4)
library(brms)
library(lmerTest)
library(lmtest)
library(MASS)
library(sandwich)
library(ordinal)
library(multiwayvcov)
library(texreg)
library(mediation)
library(psych)


survey<-read.csv("FinalDataCombined_June27.csv", header=T, stringsAsFactors = F)
caste<-read.csv("FinalCasteNames_June25.csv", header=T, stringsAsFactors = F)
call<-read.csv("MC_CallandNoCallSheet.csv", header=T, stringsAsFactors = F)


### Combine Caste Coding and Committee Membership for Measure of Committee Diversity
caste$LocationID<-ifelse(caste$q_10.10..State.where.the.corporator.lives=="UP",11,NA)
caste$LocationID<-ifelse(caste$q_10.10..State.where.the.corporator.lives=="Gujarat",22,caste$LocationID)
caste$LocationID<-ifelse(caste$q_10.10..State.where.the.corporator.lives=="West Bengal",33,caste$LocationID)
caste$LocationID<-ifelse(caste$q_10.10..State.where.the.corporator.lives=="Kerala",44,caste$LocationID)
caste$LocationID<-ifelse(caste$q_10.10..State.where.the.corporator.lives=="Karnataka",55,caste$LocationID)
caste$UniqueIDFinal<-paste0(caste$LocationID,caste$Unique.IDs)

#Make unique id coding adjustment
caste$UniqueIDFinal<-ifelse(caste$UniqueIDFinal==441166,441318,caste$UniqueIDFinal)

caste1<-merge(caste,call, by="UniqueIDFinal", all.x=T, all.y=T)
names(caste1)[names(caste1) == 'caste..Brahmin..other.forward..scheduled.caste..scheduled.tribe..other.backward.class..other.religion.'] <- 'caste_namecoded'


survey1<-survey
names(survey1)[names(survey1) == "unique_id"]<-"UniqueIDFinal"
names(survey1)[names(survey1) == "q_24"]<-"caste_self_report"
names(survey1)[names(survey1) == "q_24_other"]<-"caste_self_report_other"
survey1$caste_self_recoded<-ifelse(survey1$caste_self_report=="1", "Brahmin", NA)
survey1$caste_self_recoded<-ifelse(survey1$caste_self_report=="2", "OF", survey1$caste_self_recoded)
survey1$caste_self_recoded<-ifelse(survey1$caste_self_report=="3", "SC", survey1$caste_self_recoded)
survey1$caste_self_recoded<-ifelse(survey1$caste_self_report=="4", "ST", survey1$caste_self_recoded)
survey1$caste_self_recoded<-ifelse(survey1$caste_self_report=="5", "OBC", survey1$caste_self_recoded)
survey1$caste_self_recoded<-ifelse(survey1$caste_self_report=="6", "Other religion", survey1$caste_self_recoded)

survey1.1<-dplyr::select(survey1, c("UniqueIDFinal", "caste_self_recoded"))
survey1$q_24_other
caste2<-merge(caste1, survey1.1, by="UniqueIDFinal", all.x=T)
caste2$caste_namecoded<-ifelse(caste2$caste_namecoded=="OF ", "OF", caste2$caste_namecoded)




###Compare Caste Coding Methods
castecomparison<-as.data.frame(cbind(caste2$UniqueIDFinal, caste2$q_7.7..Name.of.corporator.x,
                                     caste2$q_6.6..Name.of.municipal.corporation.on.which.corporator.serves.x,
                                     caste2$q_10.10..State.where.the.corporator.lives.x,
                                     caste2$caste_namecoded,caste2$caste_self_recoded,
                                     caste2$coded_reservation,
                                     caste2$coded_surname, caste2$coded_fullname,
                                     caste2$coded_archival, caste2$coded_guess))
colnames(castecomparison)<-c("UniqueIDFinal", "Name", "Corporation", "State",
                             "NameCoded", "SelfCoded", "coded_reservation",
                             "coded_surname", "coded_fullname", "coded_archival", "coded_guess")

castecomparison2<-castecomparison[!is.na(castecomparison$SelfCoded),]
castecomparison2$coded_reservation<-as.numeric(as.character(castecomparison2$coded_reservation))
castecomparison2$coded_surname<-as.numeric(as.character(castecomparison2$coded_surname))
castecomparison2$coded_fullname<-as.numeric(as.character(castecomparison2$coded_fullname))
castecomparison2$coded_archival<-as.numeric(as.character(castecomparison2$coded_archival))
castecomparison2$coded_guess<-as.numeric(as.character(castecomparison2$coded_guess))

castecomparison3<-castecomparison2[castecomparison2$NameCoded!=castecomparison2$SelfCoded,]


#Table SI.5.2
sum(caste2$coded_reservation)
sum(caste2$coded_surname)
sum(caste2$coded_archival)
sum(caste2$coded_guess)
sum(caste2$coded_fullname)
colSums(castecomparison3[,7:11])


#Table SI.5.1
table(caste2$q_10.10..State.where.the.corporator.lives.x)
table(castecomparison3$State)


###Use coded caste as perception of caste
caste2$caste<-caste2$caste_namecoded

#Now count caste by committee based only on caste coding
caste2$Brahmin<-ifelse(caste2$caste=="Brahmin", 1, 0)
caste2$OBC<-ifelse(caste2$caste=="OBC", 1, 0)
caste2$OF<-ifelse(caste2$caste=="OF", 1, 0)
caste2$Other_Religion<-ifelse(caste2$caste=="Other religion", 1, 0)
caste2$SC<-ifelse(caste2$caste=="SC", 1, 0)
caste2$ST<-ifelse(caste2$caste=="ST", 1, 0)


committeecaste<-as.data.frame(matrix(nrow=145, ncol=7))
colnames(committeecaste)<-c("committee_name", "Brahmin", "OBC", "OF", "Other_Religion",
                            "SC", "ST")

for(i in 1:146){
  tempcaste<-caste2[caste2[,51+i]==1,]
  tempcaste<-tempcaste[rowSums(is.na(tempcaste)) != ncol(tempcaste), ]
  tempcaste2<-as.data.frame(colSums(tempcaste[,202:207]))
  committeecaste[i,1]<-colnames(tempcaste)[51+i]
  committeecaste[i,2]<-tempcaste2[1,1]
  committeecaste[i,3]<-tempcaste2[2,1]
  committeecaste[i,4]<-tempcaste2[3,1]
  committeecaste[i,5]<-tempcaste2[4,1]
  committeecaste[i,6]<-tempcaste2[5,1]
  committeecaste[i,7]<-tempcaste2[6,1]
}

committeecaste$size<-rowSums(committeecaste[,-1])
committeecaste$hh_index<-1-((committeecaste$Brahmin/committeecaste$size)^2+
                              (committeecaste$OF/committeecaste$size)^2+
                              (committeecaste$SC/committeecaste$size)^2+
                              (committeecaste$ST/committeecaste$size)^2+
                              (committeecaste$OBC/committeecaste$size)^2+
                              (committeecaste$Other_Religion/committeecaste$size)^2)


committeecaste$pct_forward<-(committeecaste$Brahmin+committeecaste$OF)/committeecaste$size
committeecaste$pct_notforward<-1-committeecaste$pct_forward  

colnames(committeecaste)<-c("committee_name", "cmte_Brahmin", "cmte_OBC", "cmte_OF",
                            "cmte_Other_Religion", "cmte_SC", "cmte_ST", "cmte_size",
                            "cmte_hh_index", "cmte_pct_forward", "cmte_pct_notforward")



###Now identify caste of each minister asked about in the survey

#Make a Unique ID-Caste df
caste3<-as.data.frame(cbind(caste2$UniqueIDFinal,caste2$caste,caste2[,202:207]))
colnames(caste3)<-c("UniqueIDFinal", "castecoded", "Brahmin",
                    "OBC", "OF", "Other_Religion",
                    "SC", "ST")

survey1<-merge(survey, caste3, by.x="unique_id", by.y="UniqueIDFinal")



#Merge on First Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_1", "mem_no_Brahmin_1",
                    "mem_no_OBC_1", "mem_no_OF_1", "mem_no_Other_Religion_1",
                    "mem_no_SC_1", "mem_no_ST_1")

survey_mem1<-merge(survey1, caste3, by.x="mem_no_ori_id_1", by.y="UniqueIDFinal", all.x=T)

#Merge on Second Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_2", "mem_no_Brahmin_2",
                    "mem_no_OBC_2", "mem_no_OF_2", "mem_no_Other_Religion_2",
                    "mem_no_SC_2", "mem_no_ST_2")

survey_mem2<-merge(survey_mem1, caste3, by.x="mem_no_ori_id_2", by.y="UniqueIDFinal", all.x=T)

#Merge on Third Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_3", "mem_no_Brahmin_3",
                    "mem_no_OBC_3", "mem_no_OF_3", "mem_no_Other_Religion_3",
                    "mem_no_SC_3", "mem_no_ST_3")

survey_mem3<-merge(survey_mem2, caste3, by.x="mem_no_ori_id_3", by.y="UniqueIDFinal", all.x=T)

#Merge on Fourth Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_4", "mem_no_Brahmin_4",
                    "mem_no_OBC_4", "mem_no_OF_4", "mem_no_Other_Religion_4",
                    "mem_no_SC_4", "mem_no_ST_4")

survey_mem4<-merge(survey_mem3, caste3, by.x="mem_no_ori_id_4", by.y="UniqueIDFinal", all.x=T)


#Now measure frequency of outgroup interaction
#First do it with caste categories
survey_mem4$mem_no_castematch_1<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_1,1,0)
survey_mem4$mem_no_castematch_2<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_2,1,0)
survey_mem4$mem_no_castematch_3<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_3,1,0)
survey_mem4$mem_no_castematch_4<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_4,1,0)

survey_mem4$mem_no_contact_1<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_contact_2<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_contact_3<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_contact_4<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_4,survey_mem4$q_5_4,NA)

#Number of times a respondent was asked about contact minus number of caste matches
survey_mem4$mem_no_total<-survey_mem4$q_5_6_rep_count-rowSums(survey_mem4[,100:103], na.rm=T)

survey_mem4$contact_avg<-ifelse(survey_mem4$mem_no_total==1,rowSums(survey_mem4[104:107], na.rm=T)/5, NA)
survey_mem4$contact_avg<-ifelse(survey_mem4$mem_no_total==2,rowSums(survey_mem4[104:107], na.rm=T)/10, survey_mem4$contact_avg)
survey_mem4$contact_avg<-ifelse(survey_mem4$mem_no_total==3,rowSums(survey_mem4[104:107], na.rm=T)/15, survey_mem4$contact_avg)
survey_mem4$contact_avg<-ifelse(survey_mem4$mem_no_total==4,rowSums(survey_mem4[104:107], na.rm=T)/20, survey_mem4$contact_avg)


#Now with co-caste members
survey_mem4$mem_no_in_contact_1<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_in_contact_2<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_in_contact_3<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_in_contact_4<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_4,survey_mem4$q_5_4,NA)

survey_mem4$mem_no_in_total<-survey_mem4$q_5_6_rep_count-survey_mem4$mem_no_total

survey_mem4$contact_in_avg<-ifelse(survey_mem4$mem_no_in_total==1,rowSums(survey_mem4[110:113], na.rm=T)/5, NA)
survey_mem4$contact_in_avg<-ifelse(survey_mem4$mem_no_in_total==2,rowSums(survey_mem4[110:113], na.rm=T)/10, survey_mem4$contact_in_avg)
survey_mem4$contact_in_avg<-ifelse(survey_mem4$mem_no_in_total==3,rowSums(survey_mem4[110:113], na.rm=T)/15, survey_mem4$contact_in_avg)
survey_mem4$contact_in_avg<-ifelse(survey_mem4$mem_no_in_total==4,rowSums(survey_mem4[110:113], na.rm=T)/20, survey_mem4$contact_in_avg)


#Now do it with forward caste groups
survey_mem4$forwardcoded<-ifelse(survey_mem4$castecoded=="Brahmin"|survey_mem4$castecoded=="OF",1,0)
survey_mem4$mem_no_forwardcoded_1<-ifelse(survey_mem4$mem_no_castecoded_1=="Brahmin"|survey_mem4$mem_no_castecoded_1=="OF",1,0)
survey_mem4$mem_no_forwardcoded_2<-ifelse(survey_mem4$mem_no_castecoded_2=="Brahmin"|survey_mem4$mem_no_castecoded_2=="OF",1,0)
survey_mem4$mem_no_forwardcoded_3<-ifelse(survey_mem4$mem_no_castecoded_3=="Brahmin"|survey_mem4$mem_no_castecoded_3=="OF",1,0)
survey_mem4$mem_no_forwardcoded_4<-ifelse(survey_mem4$mem_no_castecoded_4=="Brahmin"|survey_mem4$mem_no_castecoded_4=="OF",1,0)

survey_mem4$mem_no_forwardmatch_1<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_1,1,0)
survey_mem4$mem_no_forwardmatch_2<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_2,1,0)
survey_mem4$mem_no_forwardmatch_3<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_3,1,0)
survey_mem4$mem_no_forwardmatch_4<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_4,1,0)

survey_mem4$mem_no_contactf_1<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_contactf_2<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_contactf_3<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_contactf_4<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_4,survey_mem4$q_5_4,NA)

survey_mem4$mem_no_totalf<-survey_mem4$q_5_6_rep_count-rowSums(survey_mem4[,121:124], na.rm=T)


survey_mem4$contactf_avg<-ifelse(survey_mem4$mem_no_totalf==1,rowSums(survey_mem4[125:128], na.rm=T)/5, NA)
survey_mem4$contactf_avg<-ifelse(survey_mem4$mem_no_totalf==2,rowSums(survey_mem4[125:128], na.rm=T)/10, survey_mem4$contactf_avg)
survey_mem4$contactf_avg<-ifelse(survey_mem4$mem_no_totalf==3,rowSums(survey_mem4[125:128], na.rm=T)/15, survey_mem4$contactf_avg)
survey_mem4$contactf_avg<-ifelse(survey_mem4$mem_no_totalf==4,rowSums(survey_mem4[125:128], na.rm=T)/20, survey_mem4$contactf_avg)


#Ingroup forward/backward contact
survey_mem4$mem_no_in_contactf_1<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_in_contactf_2<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_in_contactf_3<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_in_contactf_4<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_4,survey_mem4$q_5_4,NA)

survey_mem4$mem_no_in_totalf<-survey_mem4$q_5_6_rep_count-survey_mem4$mem_no_totalf


survey_mem4$contactf_in_avg<-ifelse(survey_mem4$mem_no_in_totalf==1,rowSums(survey_mem4[131:134], na.rm=T)/5, NA)
survey_mem4$contactf_in_avg<-ifelse(survey_mem4$mem_no_in_totalf==2,rowSums(survey_mem4[131:134], na.rm=T)/10, survey_mem4$contactf_in_avg)
survey_mem4$contactf_in_avg<-ifelse(survey_mem4$mem_no_in_totalf==3,rowSums(survey_mem4[131:134], na.rm=T)/15, survey_mem4$contactf_in_avg)
survey_mem4$contactf_in_avg<-ifelse(survey_mem4$mem_no_in_totalf==4,rowSums(survey_mem4[131:134], na.rm=T)/20, survey_mem4$contactf_in_avg)

survey_mem5<-dplyr::select(survey_mem4, c("unique_id", "castecoded", "Brahmin", "OBC",
                                          "OF", "Other_Religion", "SC", "ST",
                                          "contact_avg", "contactf_avg", "contact_in_avg", "contactf_in_avg"))




#Table SI.3.3
###Compare caste of members surveyed to members asked about on committees
#Members asked about

#Actual call sheet respondents
table(survey_mem4$castecoded)/sum(table(survey_mem4$castecoded))

t.test(c(survey_mem4$mem_no_Brahmin_1, survey_mem4$mem_no_Brahmin_2,
         survey_mem4$mem_no_Brahmin_3, survey_mem4$mem_no_Brahmin_4), 
       survey_mem4$Brahmin)

t.test(c(survey_mem4$mem_no_OBC_1, survey_mem4$mem_no_OBC_2,
         survey_mem4$mem_no_OBC_3, survey_mem4$mem_no_OBC_4), 
       survey_mem4$OBC)


t.test(c(survey_mem4$mem_no_OF_1, survey_mem4$mem_no_OF_2,
         survey_mem4$mem_no_OF_3, survey_mem4$mem_no_OF_4), 
       survey_mem4$OF)


t.test(c(survey_mem4$mem_no_Other_Religion_1, survey_mem4$mem_no_Other_Religion_2,
         survey_mem4$mem_no_Other_Religion_3, survey_mem4$mem_no_Other_Religion_4), 
       survey_mem4$Other_Religion)

t.test(c(survey_mem4$mem_no_SC_1, survey_mem4$mem_no_SC_2,
         survey_mem4$mem_no_SC_3, survey_mem4$mem_no_SC_4), 
       survey_mem4$SC)


t.test(c(survey_mem4$mem_no_ST_1, survey_mem4$mem_no_ST_2,
         survey_mem4$mem_no_ST_3, survey_mem4$mem_no_ST_4), 
       survey_mem4$ST)

#Compare call sheet selected to not selected
called<-caste2[caste2$block_assignment==1,]
notcalled<-caste2[caste2$block_assignment==0,]

t.test(c(survey_mem4$mem_no_Brahmin_1, survey_mem4$mem_no_Brahmin_2,
         survey_mem4$mem_no_Brahmin_3, survey_mem4$mem_no_Brahmin_4), 
       called$Brahmin)

t.test(c(survey_mem4$mem_no_OBC_1, survey_mem4$mem_no_OBC_2,
         survey_mem4$mem_no_OBC_3, survey_mem4$mem_no_OBC_4), 
       called$OBC)


t.test(c(survey_mem4$mem_no_OF_1, survey_mem4$mem_no_OF_2,
         survey_mem4$mem_no_OF_3, survey_mem4$mem_no_OF_4), 
       called$OF)


t.test(c(survey_mem4$mem_no_Other_Religion_1, survey_mem4$mem_no_Other_Religion_2,
         survey_mem4$mem_no_Other_Religion_3, survey_mem4$mem_no_Other_Religion_4), 
       called$Other_Religion)

t.test(c(survey_mem4$mem_no_SC_1, survey_mem4$mem_no_SC_2,
         survey_mem4$mem_no_SC_3, survey_mem4$mem_no_SC_4), 
       called$SC)


t.test(c(survey_mem4$mem_no_ST_1, survey_mem4$mem_no_ST_2,
         survey_mem4$mem_no_ST_3, survey_mem4$mem_no_ST_4), 
       called$ST)

#Not called
t.test(c(survey_mem4$mem_no_Brahmin_1, survey_mem4$mem_no_Brahmin_2,
         survey_mem4$mem_no_Brahmin_3, survey_mem4$mem_no_Brahmin_4), 
       notcalled$Brahmin)

t.test(c(survey_mem4$mem_no_OBC_1, survey_mem4$mem_no_OBC_2,
         survey_mem4$mem_no_OBC_3, survey_mem4$mem_no_OBC_4), 
       notcalled$OBC)


t.test(c(survey_mem4$mem_no_OF_1, survey_mem4$mem_no_OF_2,
         survey_mem4$mem_no_OF_3, survey_mem4$mem_no_OF_4), 
       notcalled$OF)


t.test(c(survey_mem4$mem_no_Other_Religion_1, survey_mem4$mem_no_Other_Religion_2,
         survey_mem4$mem_no_Other_Religion_3, survey_mem4$mem_no_Other_Religion_4), 
       notcalled$Other_Religion)

t.test(c(survey_mem4$mem_no_SC_1, survey_mem4$mem_no_SC_2,
         survey_mem4$mem_no_SC_3, survey_mem4$mem_no_SC_4), 
       notcalled$SC)


t.test(c(survey_mem4$mem_no_ST_1, survey_mem4$mem_no_ST_2,
         survey_mem4$mem_no_ST_3, survey_mem4$mem_no_ST_4), 
       notcalled$ST)







###Repeat coding with merging self and namecoded caste together
#Merge self and namecoded caste together
caste2$caste<-caste2$caste_self_recoded
caste2$caste<-ifelse(is.na(caste2$caste), caste2$caste_namecoded, caste2$caste)


#Now count caste by committee based only on caste coding
caste2$Brahmin<-ifelse(caste2$caste=="Brahmin", 1, 0)
caste2$OBC<-ifelse(caste2$caste=="OBC", 1, 0)
caste2$OF<-ifelse(caste2$caste=="OF", 1, 0)
caste2$Other_Religion<-ifelse(caste2$caste=="Other religion", 1, 0)
caste2$SC<-ifelse(caste2$caste=="SC", 1, 0)
caste2$ST<-ifelse(caste2$caste=="ST", 1, 0)


committeecaste_sr<-as.data.frame(matrix(nrow=145, ncol=7))
colnames(committeecaste_sr)<-c("committee_name", "Brahmin", "OBC", "OF", "Other_Religion",
                               "SC", "ST")

for(i in 1:146){
  tempcaste<-caste2[caste2[,51+i]==1,]
  tempcaste<-tempcaste[rowSums(is.na(tempcaste)) != ncol(tempcaste), ]
  tempcaste2<-as.data.frame(colSums(tempcaste[,202:207]))
  committeecaste_sr[i,1]<-colnames(tempcaste)[51+i]
  committeecaste_sr[i,2]<-tempcaste2[1,1]
  committeecaste_sr[i,3]<-tempcaste2[2,1]
  committeecaste_sr[i,4]<-tempcaste2[3,1]
  committeecaste_sr[i,5]<-tempcaste2[4,1]
  committeecaste_sr[i,6]<-tempcaste2[5,1]
  committeecaste_sr[i,7]<-tempcaste2[6,1]
}

committeecaste_sr$size<-rowSums(committeecaste_sr[,-1])
committeecaste_sr$hh_index_sr<-1-((committeecaste_sr$Brahmin/committeecaste_sr$size)^2+
                                    (committeecaste_sr$OF/committeecaste_sr$size)^2+
                                    (committeecaste_sr$SC/committeecaste_sr$size)^2+
                                    (committeecaste_sr$ST/committeecaste_sr$size)^2+
                                    (committeecaste_sr$OBC/committeecaste_sr$size)^2+
                                    (committeecaste_sr$Other_Religion/committeecaste_sr$size)^2)


committeecaste_sr$pct_forward_sr<-(committeecaste_sr$Brahmin+committeecaste_sr$OF)/committeecaste_sr$size
committeecaste_sr$pct_notforward_sr<-1-committeecaste_sr$pct_forward  


committeecaste_sr<-committeecaste_sr[,-8]
colnames(committeecaste_sr)<-c("committee_name", "cmte_Brahmin_sr", "cmte_OBC_sr", "cmte_OF_sr",
                               "cmte_Other_Religion_sr", "cmte_SC_sr", "cmte_ST_sr",
                               "cmte_hh_index_sr", "cmte_pct_forward_sr", "cmte_pct_notforward_sr")



###Now identify caste of each minister asked about in the survey

#Make a Unique ID-Caste df
caste3<-as.data.frame(cbind(caste2$UniqueIDFinal,caste2$caste,caste2[,202:207]))
colnames(caste3)<-c("UniqueIDFinal", "castecoded", "Brahmin",
                    "OBC", "OF", "Other_Religion",
                    "SC", "ST")

survey1<-merge(survey, caste3, by.x="unique_id", by.y="UniqueIDFinal")



#Merge on First Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_1", "mem_no_Brahmin_1",
                    "mem_no_OBC_1", "mem_no_OF_1", "mem_no_Other_Religion_1",
                    "mem_no_SC_1", "mem_no_ST_1")

survey_mem1<-merge(survey1, caste3, by.x="mem_no_ori_id_1", by.y="UniqueIDFinal", all.x=T)

#Merge on Second Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_2", "mem_no_Brahmin_2",
                    "mem_no_OBC_2", "mem_no_OF_2", "mem_no_Other_Religion_2",
                    "mem_no_SC_2", "mem_no_ST_2")

survey_mem2<-merge(survey_mem1, caste3, by.x="mem_no_ori_id_2", by.y="UniqueIDFinal", all.x=T)

#Merge on Third Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_3", "mem_no_Brahmin_3",
                    "mem_no_OBC_3", "mem_no_OF_3", "mem_no_Other_Religion_3",
                    "mem_no_SC_3", "mem_no_ST_3")

survey_mem3<-merge(survey_mem2, caste3, by.x="mem_no_ori_id_3", by.y="UniqueIDFinal", all.x=T)

#Merge on Fourth Member Caste
colnames(caste3)<-c("UniqueIDFinal", "mem_no_castecoded_4", "mem_no_Brahmin_4",
                    "mem_no_OBC_4", "mem_no_OF_4", "mem_no_Other_Religion_4",
                    "mem_no_SC_4", "mem_no_ST_4")

survey_mem4<-merge(survey_mem3, caste3, by.x="mem_no_ori_id_4", by.y="UniqueIDFinal", all.x=T)


#Now measure frequency of outgroup interaction
#First do it with caste categories
survey_mem4$mem_no_castematch_1<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_1,1,0)
survey_mem4$mem_no_castematch_2<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_2,1,0)
survey_mem4$mem_no_castematch_3<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_3,1,0)
survey_mem4$mem_no_castematch_4<-ifelse(survey_mem4$castecoded==survey_mem4$mem_no_castecoded_4,1,0)

survey_mem4$mem_no_contact_1<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_contact_2<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_contact_3<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_contact_4<-ifelse(survey_mem4$castecoded!=survey_mem4$mem_no_castecoded_4,survey_mem4$q_5_4,NA)

#Number of times a respondent was asked about contact minus number of caste matches
survey_mem4$mem_no_total<-survey_mem4$q_5_6_rep_count-rowSums(survey_mem4[,100:103], na.rm=T)

survey_mem4$contact_avg_sr<-ifelse(survey_mem4$mem_no_total==1,rowSums(survey_mem4[104:107], na.rm=T)/5, NA)
survey_mem4$contact_avg_sr<-ifelse(survey_mem4$mem_no_total==2,rowSums(survey_mem4[104:107], na.rm=T)/10, survey_mem4$contact_avg)
survey_mem4$contact_avg_sr<-ifelse(survey_mem4$mem_no_total==3,rowSums(survey_mem4[104:107], na.rm=T)/15, survey_mem4$contact_avg)
survey_mem4$contact_avg_sr<-ifelse(survey_mem4$mem_no_total==4,rowSums(survey_mem4[104:107], na.rm=T)/20, survey_mem4$contact_avg)



#Now do it with forward caste groups
survey_mem4$forwardcoded<-ifelse(survey_mem4$castecoded=="Brahmin"|survey_mem4$castecoded=="OF",1,0)
survey_mem4$mem_no_forwardcoded_1<-ifelse(survey_mem4$mem_no_castecoded_1=="Brahmin"|survey_mem4$mem_no_castecoded_1=="OF",1,0)
survey_mem4$mem_no_forwardcoded_2<-ifelse(survey_mem4$mem_no_castecoded_2=="Brahmin"|survey_mem4$mem_no_castecoded_2=="OF",1,0)
survey_mem4$mem_no_forwardcoded_3<-ifelse(survey_mem4$mem_no_castecoded_3=="Brahmin"|survey_mem4$mem_no_castecoded_3=="OF",1,0)
survey_mem4$mem_no_forwardcoded_4<-ifelse(survey_mem4$mem_no_castecoded_4=="Brahmin"|survey_mem4$mem_no_castecoded_4=="OF",1,0)

survey_mem4$mem_no_forwardmatch_1<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_1,1,0)
survey_mem4$mem_no_forwardmatch_2<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_2,1,0)
survey_mem4$mem_no_forwardmatch_3<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_3,1,0)
survey_mem4$mem_no_forwardmatch_4<-ifelse(survey_mem4$forwardcoded==survey_mem4$mem_no_forwardcoded_4,1,0)

survey_mem4$mem_no_contactf_1<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_contactf_2<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_contactf_3<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_contactf_4<-ifelse(survey_mem4$forwardcoded!=survey_mem4$mem_no_forwardcoded_4,survey_mem4$q_5_4,NA)

survey_mem4$mem_no_totalf<-survey_mem4$q_5_6_rep_count-rowSums(survey_mem4[,115:118], na.rm=T)

survey_mem4$contactf_avg_sr<-ifelse(survey_mem4$mem_no_totalf==1,rowSums(survey_mem4[119:122], na.rm=T)/5, NA)
survey_mem4$contactf_avg_sr<-ifelse(survey_mem4$mem_no_totalf==2,rowSums(survey_mem4[119:122], na.rm=T)/10, survey_mem4$contactf_avg)
survey_mem4$contactf_avg_sr<-ifelse(survey_mem4$mem_no_totalf==3,rowSums(survey_mem4[119:122], na.rm=T)/15, survey_mem4$contactf_avg)
survey_mem4$contactf_avg_sr<-ifelse(survey_mem4$mem_no_totalf==4,rowSums(survey_mem4[119:122], na.rm=T)/20, survey_mem4$contactf_avg)


survey_mem6<-dplyr::select(survey_mem4, c("unique_id",
                                          "contact_avg_sr", "contactf_avg_sr"))



#Percentage of women on committee
names(caste2)[names(caste2) == "q_8.8..Gender.of.corporator.x"]<-"gender"
caste2$female<-ifelse(caste2$gender==2,1,0)
caste2$female<-as.numeric(as.character(caste2$female))

committeegender<-as.data.frame(matrix(nrow=145, ncol=3))
colnames(committeegender)<-c("committee_name", "num_female", "pct_female")
caste2$counter<-1

for(i in 1:146){
  tempgender<-caste2[caste2[,51+i]==1,]
  tempgender<-tempgender[rowSums(is.na(tempgender))!=ncol(tempgender), ]
  committeegender[i,1]<-colnames(tempgender)[51+i]
  committeegender[i,2]<-sum(tempgender$female)
  committeegender[i,3]<-sum(tempgender$female)/sum(tempgender$counter)
}


#Now go to contact
gender<-as.data.frame(cbind(caste2$UniqueIDFinal, caste2$female))
gender$V2<-as.numeric(as.character(gender$V2))
colnames(gender)<-c('UniqueIDFinal', "female")


survey1<-merge(survey, gender, by.x="unique_id", by.y="UniqueIDFinal")

#Merge on First Member gender
colnames(gender)<-c("UniqueIDFinal", "mem_no_female_1")

survey_mem1<-merge(survey1, gender, by.x="mem_no_ori_id_1", by.y="UniqueIDFinal", all.x=T)

#Merge on Second Member gender
colnames(gender)<-c("UniqueIDFinal", "mem_no_female_2")

survey_mem2<-merge(survey_mem1, gender, by.x="mem_no_ori_id_2", by.y="UniqueIDFinal", all.x=T)

#Merge on Third Member gender
colnames(gender)<-c("UniqueIDFinal", "mem_no_female_3")

survey_mem3<-merge(survey_mem2, gender, by.x="mem_no_ori_id_3", by.y="UniqueIDFinal", all.x=T)

#Merge on Fourth Member gender
colnames(gender)<-c("UniqueIDFinal", "mem_no_female_4")

survey_mem4<-merge(survey_mem3, gender, by.x="mem_no_ori_id_4", by.y="UniqueIDFinal", all.x=T)

survey_mem4$mem_no_gendermatch_1<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_1,1,0)
survey_mem4$mem_no_gendermatch_2<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_2,1,0)
survey_mem4$mem_no_gendermatch_3<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_3,1,0)
survey_mem4$mem_no_gendermatch_4<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_4,1,0)

survey_mem4$mem_no_gendercontact_1<-ifelse(survey_mem4$female!=survey_mem4$mem_no_female_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_gendercontact_2<-ifelse(survey_mem4$female!=survey_mem4$mem_no_female_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_gendercontact_3<-ifelse(survey_mem4$female!=survey_mem4$mem_no_female_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_gendercontact_4<-ifelse(survey_mem4$female!=survey_mem4$mem_no_female_4,survey_mem4$q_5_4,NA)

survey_mem4$mem_no_total<-survey_mem4$q_5_6_rep_count-rowSums(survey_mem4[,70:73], na.rm=T)

survey_mem4$contact_gender<-ifelse(survey_mem4$mem_no_total==1,rowSums(survey_mem4[74:77], na.rm=T)/5, NA)
survey_mem4$contact_gender<-ifelse(survey_mem4$mem_no_total==2,rowSums(survey_mem4[74:77], na.rm=T)/10, survey_mem4$contact_gender)
survey_mem4$contact_gender<-ifelse(survey_mem4$mem_no_total==3,rowSums(survey_mem4[74:77], na.rm=T)/15, survey_mem4$contact_gender)
survey_mem4$contact_gender<-ifelse(survey_mem4$mem_no_total==4,rowSums(survey_mem4[74:77], na.rm=T)/20, survey_mem4$contact_gender)

#Now for in-gender
survey_mem4$mem_no_in_gendercontact_1<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_1,survey_mem4$q_5_1,NA)
survey_mem4$mem_no_in_gendercontact_2<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_2,survey_mem4$q_5_2,NA)
survey_mem4$mem_no_in_gendercontact_3<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_3,survey_mem4$q_5_3,NA)
survey_mem4$mem_no_in_gendercontact_4<-ifelse(survey_mem4$female==survey_mem4$mem_no_female_4,survey_mem4$q_5_4,NA)

survey_mem4$mem_no_in_total<-survey_mem4$q_5_6_rep_count-survey_mem4$mem_no_total

survey_mem4$contact_in_gender<-ifelse(survey_mem4$mem_no_in_total==1,rowSums(survey_mem4[80:83], na.rm=T)/5, NA)
survey_mem4$contact_in_gender<-ifelse(survey_mem4$mem_no_in_total==2,rowSums(survey_mem4[80:83], na.rm=T)/10, survey_mem4$contact_in_gender)
survey_mem4$contact_in_gender<-ifelse(survey_mem4$mem_no_in_total==3,rowSums(survey_mem4[80:83], na.rm=T)/15, survey_mem4$contact_in_gender)
survey_mem4$contact_in_gender<-ifelse(survey_mem4$mem_no_in_total==4,rowSums(survey_mem4[80:83], na.rm=T)/20, survey_mem4$contact_in_gender)


survey_mem7<-dplyr::select(survey_mem4, c("unique_id",
                                          "contact_gender", "contact_in_gender"))






###Survey preparation
call$callorder<-row.names(call)
names(call)[names(call) == "q_6.6..Name.of.municipal.corporation.on.which.corporator.serves"]<-"corporation"
names(call)[names(call) == "q_8.8..Gender.of.corporator"]<-"gender"
names(call)[names(call) == "q_9.9..Corporator.constituency..Reserved.for.SC.ST.or.general."]<-"anyreservation"
names(call)[names(call) == "q_9_resvd...Reserved"]<-"reservationtype"
names(call)[names(call) == "q_11.11..Constituency.the.corporator.represents"]<-"constituency_name"
names(call)[names(call) == "q_12.12..Number.of.committees.served.by.the.corporator"]<-"number_cmtes_on"
names(call)[names(call) == "q_14.14..Corporator.political.party"]<-"political_party"
names(call)[names(call) == "q_16.16..When.was.the.corporator.first.elected"]<-"year_elected"
names(call)[names(call) == "q_17.17..Number.of.times.elected"]<-"times_elected"
names(call)[names(call) == "q_19.19..Number.of.committees.in.the.corporation"]<-"corporation_cmtes"
names(call)[names(call) == "Wave2_AskAbout"]<-"committee_asked_about"
names(call)[names(call) == "Wave2_CasteReserved"]<-"caste_reservation"

call2<-dplyr::select(call, c("UniqueIDFinal", "corporation", "gender", "anyreservation",
                             "reservationtype", "constituency_name", "number_cmtes_on",
                             "political_party", "times_elected", "corporation_cmtes",
                             "committee_asked_about", "caste_reservation", "block_assignment",
                             "callorder"))


survey2<-survey
survey2<-survey2[,-c(4,7:9)]
survey2[,5]<-as.numeric(survey2[,5])
survey2$year_elected<-rowSums(survey2[,c(5,11)], na.rm=T)
survey2$year_elected<-ifelse(survey2$year_elected==0,NA,survey2$year_elected)
survey2<-survey2[,-c(5,11:12)]

names(survey2)[names(survey2) == "state_name_cal"]<-"state"
names(survey2)[names(survey2) == "q_1"]<-"agree_participate"
names(survey2)[names(survey2) == "q_6"]<-"cmte_interaction_pos"
survey2$treated<-ifelse(survey2$treat_con_cal==2,1,0)

names(survey2)[names(survey2) == "q_9"]<-"cmte_spend_wisely"
names(survey2)[names(survey2) == "q_10"]<-"cmte_opinions"
names(survey2)[names(survey2) == "q_11"]<-"cmte_validconcerns"
names(survey2)[names(survey2) == "q_12"]<-"cmte_onegroup"

names(survey2)[names(survey2) == "q_13"]<-"enthusiastic"
names(survey2)[names(survey2) == "q_14"]<-"angry"
names(survey2)[names(survey2) == "q_15"]<-"hopeful"
names(survey2)[names(survey2) == "q_16"]<-"resentful"

names(survey2)[names(survey2) == "q_17"]<-"donation_caste"
names(survey2)[names(survey2) == "q_18"]<-"cmte_castetrust"
survey2$neighbor_comfort<-6-survey2$q_19
names(survey2)[names(survey2) == "q_20"]<-"talk_comfort"

names(survey2)[names(survey2) == "q_21"]<-"age"
names(survey2)[names(survey2) == "q_22"]<-"socialmedia"
names(survey2)[names(survey2) == "q_23"]<-"educationyears"
names(survey2)[names(survey2) == "q_24"]<-"caste_self_report"
names(survey2)[names(survey2) == "q_24_other"]<-"caste_self_report_other"
names(survey2)[names(survey2) == "q_25"]<-"occupation_reported"
names(survey2)[names(survey2) == "q_26"]<-"times_called"
names(survey2)[names(survey2) == "q_27"]<-"interview_language"


survey3<-merge(survey2, survey_mem5, by="unique_id")
survey3.1.0<-merge(survey3, survey_mem6, by="unique_id")
survey3.1.1<-merge(survey3.1.0, survey_mem7, by="unique_id")
survey3.1<-merge(survey3.1.1, call2, by.x="unique_id", by.y="UniqueIDFinal")
survey3.2.0<-merge(survey3.1, committeecaste, by.x="committee_asked_about", by.y="committee_name")
survey3.2.1<-merge(survey3.2.0, committeegender, by.x="committee_asked_about", by.y="committee_name")
survey3.2<-merge(survey3.2.1, committeecaste_sr, by.x="committee_asked_about", by.y="committee_name")


survey3.2$socialmedia_active<-ifelse(survey3.2$socialmedia==2,1,0)
survey3.2$female<-ifelse(survey3.2$gender==2,1,0)
survey3.2$BJP<-ifelse(survey3.2$political_party=="Bjp"|survey3.2$political_party=="BJP"|survey3.2$political_party=="B j p",1,0)
survey3.2$INC<-ifelse(survey3.2$political_party=="Congree"|survey3.2$political_party=="Congress"|
                        survey3.2$political_party=="INC"|survey3.2$political_party=="Inc",1,0)
survey3.2$cmte_interaction_pos01<-ifelse(survey3.2$cmte_interaction_pos==2,1,0)
survey3.2$edu_10under<-ifelse(survey3.2$educationyears=="5"|survey3.2$educationyears=="10",1,0)
survey3.2$edu_bachelors<-ifelse(survey3.2$educationyears=="15",1,0)
survey3.2$edu_above<-ifelse(survey3.2$educationyears=="16",1,0)
survey3.2$edu_high<-ifelse(survey3.2$edu_10under==0,1,0)

survey3.2$Brahmin_sr<-ifelse(survey3.2$caste_self_report=="1",1,0)
survey3.2$OF_sr<-ifelse(survey3.2$caste_self_report=="2",1,0)
survey3.2$SC_sr<-ifelse(survey3.2$caste_self_report=="3",1,0)
survey3.2$ST_sr<-ifelse(survey3.2$caste_self_report=="4",1,0)
survey3.2$OBC_sr<-ifelse(survey3.2$caste_self_report=="5",1,0)
survey3.2$Other_Religion_sr<-ifelse(survey3.2$caste_self_report=="6",1,0)
survey3.2$caste_self_report<-as.factor(survey3.2$caste_self_report)

survey3.2$interview_language<-ifelse(survey3.2$interview_language=="Bangali","Bengali",
                                     survey3.2$interview_language)
survey3.2$interview_language<-ifelse(survey3.2$interview_language=="Gujrati hindi","Gujarati Hindi",
                                     survey3.2$interview_language)
survey3.2$interview_language<-ifelse(survey3.2$interview_language=="Gujrati Hindi","Gujarati Hindi",
                                     survey3.2$interview_language)

survey3.2$years_office<-2020-survey3.2$year_elected
survey3.2$years_6over<-ifelse(survey3.2$years_office>5,1,0)
survey3.2$times_called<-ifelse(survey3.2$times_called==0,1,survey3.2$times_called)
survey3.2$times_called<-ifelse(survey3.2$times_called==-2,2,survey3.2$times_called)
survey3.2$times_called_3over<-ifelse(survey3.2$times_called>2,1,0)

survey3.2$political_party<-ifelse(survey3.2$political_party=="Congree"|survey3.2$political_party=="Congress"|
                                    survey3.2$political_party=="INC"|survey3.2$political_party=="Inc","INC", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Bjp"|survey3.2$political_party=="BJP"|survey3.2$political_party=="B j p","BJP",survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Bjp"|survey3.2$political_party=="BJP"|survey3.2$political_party=="B j p","BJP",survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Cpi"|survey3.2$political_party=="Cpi(m)"|survey3.2$political_party=="Cpim"|survey3.2$political_party=="CPIM", "CPI", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Iuml", "IUML", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Bnp", "BNP", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Jss", "JSS", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Rsp", "RSP", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Nirdaliya", "Independent", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Samajvadi party", "SP", survey3.2$political_party)
survey3.2$political_party<-ifelse(survey3.2$political_party=="Tmc", "TMC", survey3.2$political_party)



#Affective status classification
###Affective Status
library(tidyLPA)
library(tidyverse)
library(psych)

survey3.2<-survey3.2[!is.na(survey3.2$enthusiastic),]
model1.5data<-survey3.2

#VSS and bi-plot
emotions<-as.matrix(cbind(survey3.2$enthusiastic,survey3.2$angry,survey3.2$hopeful,survey3.2$resentful))

#Shows that fit "peak" is at 2 factors
vss(emotions,fm="minres", rotate="oblimin")
#Should retain two factors (e.g., pleasant and unpleasant)
fa.parallel(emotions, fm="minres")

#Run the factor analysis in 7 different ways
factors.mr_r<-fa(emotions, 2, fm="minres", rotate ="oblimin", scores="regression")
factors.ml_r<-fa(emotions, 2, fm="ml", rotate ="oblimin", scores="regression")
factors.mr_b<-fa(emotions, 2, fm="minres", rotate ="oblimin", scores="Bartlett") ##This is our baseline
factors.ml_b<-fa(emotions, 2, fm="ml", rotate ="oblimin", scores="Bartlett")
factors.pa_r<-fa(emotions, 2, fm="pa", rotate ="oblimin", scores="regression")
factors.pa_b<-fa(emotions, 2, fm="pa", rotate ="oblimin", scores="Bartlett")
factors.pca<-principal(emotions, 2, rotate ="oblimin", scores=T)

#All these factor analyses say the same thing, essentially
par(mfrow=c(2,4))
biplot(factors.mr_b, col=c("grey", "blue"),
       main="Minres Bartlett") #Main plot
biplot(factors.mr_r, col=c("grey", "blue"),
       main="Minres Regression")
biplot(factors.ml_r, col=c("grey", "blue"),
       main="Maxlik Regression")
biplot(factors.ml_b, col=c("grey", "blue"),
       main="Maxlik Bartlett")
biplot(factors.pa_r, col=c("grey", "blue"),
       main="Principal Axis Regression")
biplot(factors.pa_b, col=c("grey", "blue"),
       main="Principal Axis Bartlett")
biplot(factors.pca, col=c("grey", "blue"),
       main="PCA")
dev.off()

##Add factor scores
#Minres-regression
factors.mr_r.df <- as.data.frame(factors.mr_r$scores)
factors.mr_r.df$upfact <- as.numeric(factors.mr_r.df$MR1)
factors.mr_r.df$pfact <- as.numeric(factors.mr_r.df$MR2)
model1.5data$upfact_mr_r <- factors.mr_r.df$upfact
model1.5data$pfact_mr_r <- factors.mr_r.df$pfact
##To check if we assinged the correct factors:
cor(model1.5data$pfact_mr_r,model1.5data$hopeful, use="complete.obs")
cor(model1.5data$upfact_mr_r,model1.5data$resentful, use="complete.obs")

#maxlik-regression
factors_ml_r.df <- as.data.frame(factors.ml_r$scores)
factors_ml_r.df$upfact <- as.numeric(factors_ml_r.df$ML1)
factors_ml_r.df$pfact <- as.numeric(factors_ml_r.df$ML2)
model1.5data$upfact_ml_r <- factors_ml_r.df$upfact
model1.5data$pfact_ml_r <- factors_ml_r.df$pfact
##To check if we assinged the correct factors:
cor(model1.5data$pfact_ml_r,model1.5data$hopeful, use="complete.obs")
cor(model1.5data$upfact_ml_r,model1.5data$resentful, use="complete.obs")

#Minres-bartlett
factors_mr_b.df <- as.data.frame(factors.mr_b$scores)
factors_mr_b.df$upfact <- as.numeric(factors_mr_b.df$MR1)
factors_mr_b.df$pfact <- as.numeric(factors_mr_b.df$MR2)
model1.5data$upfact_mr_b <- factors_mr_b.df$upfact
model1.5data$pfact_mr_b <- factors_mr_b.df$pfact
##To check if we assinged the correct factors:
cor(model1.5data$pfact_mr_b,model1.5data$hopeful, use="complete.obs")
cor(model1.5data$upfact_mr_b,model1.5data$resentful, use="complete.obs")

#maxlik-bartlett
factors_ml_b.df <- as.data.frame(factors.ml_b$scores)
factors_ml_b.df$upfact <- as.numeric(factors_ml_b.df$ML1)
factors_ml_b.df$pfact <- as.numeric(factors_ml_b.df$ML2)
model1.5data$upfact_ml_b <- factors_ml_b.df$upfact
model1.5data$pfact_ml_b <- factors_ml_b.df$pfact
##To check if we assinged the correct factors:
cor(model1.5data$pfact_ml_b,model1.5data$hopeful, use="complete.obs")
cor(model1.5data$upfact_ml_b,model1.5data$resentful, use="complete.obs")

#princ access-regression
factors_pa_r.df <- as.data.frame(factors.pa_r$scores)
factors_pa_r.df$upfact <- as.numeric(factors_pa_r.df$PA1)
factors_pa_r.df$pfact <- as.numeric(factors_pa_r.df$PA2)
model1.5data$upfact_pa_r <- factors_pa_r.df$upfact
model1.5data$pfact_pa_r <- factors_pa_r.df$pfact
##To check if we assinged the correct factors:
cor(model1.5data$pfact_pa_r,model1.5data$hopeful, use="complete.obs")
cor(model1.5data$upfact_pa_r,model1.5data$resentful, use="complete.obs")

#princ access-bartlett
factors_pa_b.df <- as.data.frame(factors.pa_b$scores)
factors_pa_b.df$upfact <- as.numeric(factors_pa_b.df$PA1)
factors_pa_b.df$pfact <- as.numeric(factors_pa_b.df$PA2)
model1.5data$upfact_pa_b <- factors_pa_b.df$upfact
model1.5data$pfact_pa_b <- factors_pa_b.df$pfact
##To check if we assinged the correct factors:
cor(model1.5data$pfact_pa_b,model1.5data$hopeful, use="complete.obs")
cor(model1.5data$upfact_pa_b,model1.5data$resentful, use="complete.obs")

#pca
factors_pca.df <- as.data.frame(factors.pca$scores)
factors_pca.df$upfact <- as.numeric(factors_pca.df$TC1)
factors_pca.df$pfact <- as.numeric(factors_pca.df$TC2)
model1.5data$upfact_pca <- factors_pca.df$upfact
model1.5data$pfact_pca <- factors_pca.df$pfact
##To check if we assinged the correct factors:
cor(model1.5data$pfact_pca,model1.5data$hopeful, use="complete.obs")
cor(model1.5data$upfact_pca,model1.5data$resentful, use="complete.obs")

#Correlation between all the factors
library(xtable)
factors_all <- model1.5data %>%
  dplyr::select(pfact_mr_r,pfact_mr_b,pfact_ml_r,pfact_ml_b,pfact_pa_r,
         pfact_pa_b,pfact_pca,upfact_mr_r,upfact_mr_b,
         upfact_ml_r,upfact_ml_b,upfact_pa_r,upfact_pa_b,
         upfact_pca)
factors_corr <- cor(factors_all, use="complete.obs")
factors_corr
xtable(factors_corr[,1:7], type="latex")
xtable(factors_corr[,8:14], type="latex")

##To generate the affect variable for each method:  
#Minres-regression
model1.5data$affect_mr_r <- NA
model1.5data$affect_mr_r <- ifelse((model1.5data$pfact_mr_r>0 & model1.5data$upfact_mr_r<0),
                                   "Pleasant",ifelse((model1.5data$pfact_mr_r<0 & 
                                                        model1.5data$upfact_mr_r<0),
                                                     "Weak",ifelse((model1.5data$pfact_mr_r>0 
                                                                    & model1.5data$upfact_mr_r>0),
                                                                   "Mixed","Unpleasant")))
model1.5data$affect_mr_r<- factor(model1.5data$affect_mr_r , ordered = FALSE )
model1.5data$affect_mr_r <- relevel(model1.5data$affect_mr_r, ref="Weak")
table(model1.5data$affect_mr_r)

#maxlik-regression
model1.5data$affect_ml_r <- NA
model1.5data$affect_ml_r <- ifelse((model1.5data$pfact_ml_r>0 & 
                                      model1.5data$upfact_ml_r<0),
                                   "Pleasant",ifelse((model1.5data$pfact_ml_r<0 & 
                                                        model1.5data$upfact_ml_r<0),
                                                     "Weak",ifelse((model1.5data$pfact_ml_r>0 & 
                                                                      model1.5data$upfact_ml_r>0),
                                                                   "Mixed","Unpleasant")))
model1.5data$affect_ml_r<- factor(model1.5data$affect_ml_r , ordered = FALSE )
model1.5data$affect_ml_r <- relevel(model1.5data$affect_ml_r, ref="Weak")
table(model1.5data$affect_ml_r)

#Minres-bartlett
model1.5data$affect_mr_b <- NA
model1.5data$affect_mr_b <- ifelse((model1.5data$pfact_mr_b>0 & 
                                      model1.5data$upfact_mr_b<0),
                                   "Pleasant",ifelse((model1.5data$pfact_mr_b<0 &
                                                        model1.5data$upfact_mr_b<0),
                                                     "Weak",ifelse((model1.5data$pfact_mr_b>0 & 
                                                                      model1.5data$upfact_mr_b>0),
                                                                   "Mixed","Unpleasant")))
model1.5data$affect_mr_b<- factor(model1.5data$affect_mr_b , ordered = FALSE )
model1.5data$affect_mr_b <- relevel(model1.5data$affect_mr_b, ref="Weak")
table(model1.5data$affect_mr_b)

#maxlik-bartlett
model1.5data$affect_ml_b <- NA
model1.5data$affect_ml_b <- ifelse((model1.5data$pfact_ml_b>0 & 
                                      model1.5data$upfact_ml_b<0),
                                   "Pleasant",ifelse((model1.5data$pfact_ml_b<0 &
                                                        model1.5data$upfact_ml_b<0),
                                                     "Weak",ifelse((model1.5data$pfact_ml_b>0 & 
                                                                      model1.5data$upfact_ml_b>0),
                                                                   "Mixed","Unpleasant")))
model1.5data$affect_ml_b<- factor(model1.5data$affect_ml_b , ordered = FALSE )
model1.5data$affect_ml_b <- relevel(model1.5data$affect_ml_b, ref="Weak")
table(model1.5data$affect_ml_b)

#princ access-regression
model1.5data$affect_pa_r <- NA
model1.5data$affect_pa_r <- ifelse((model1.5data$pfact_pa_r>0 & 
                                      model1.5data$upfact_pa_r<0),
                                   "Pleasant",ifelse((model1.5data$pfact_pa_r<0 &
                                                        model1.5data$upfact_pa_r<0),
                                                     "Weak",ifelse((model1.5data$pfact_pa_r>0 &
                                                                      model1.5data$upfact_pa_r>0),
                                                                   "Mixed","Unpleasant")))
model1.5data$affect_pa_r<- factor(model1.5data$affect_pa_r , ordered = FALSE )
model1.5data$affect_pa_r <- relevel(model1.5data$affect_pa_r, ref="Weak")
table(model1.5data$affect_pa_r)

#princ access-bartlett
model1.5data$affect_pa_b <- NA
model1.5data$affect_pa_b <- ifelse((model1.5data$pfact_pa_b>0 & 
                                      model1.5data$upfact_pa_b<0),
                                   "Pleasant",ifelse((model1.5data$pfact_pa_b<0 &
                                                        model1.5data$upfact_pa_b<0),
                                                     "Weak",ifelse((model1.5data$pfact_pa_b>0 &
                                                                      model1.5data$upfact_pa_b>0),
                                                                   "Mixed","Unpleasant")))
model1.5data$affect_pa_b<- factor(model1.5data$affect_pa_b , ordered = FALSE )
model1.5data$affect_pa_b <- relevel(model1.5data$affect_pa_b, ref="Weak")
table(model1.5data$affect_pa_b)

#pca
model1.5data$affect_pca <- NA
model1.5data$affect_pca <- ifelse((model1.5data$pfact_pca>0 &
                                     model1.5data$upfact_pca<0),
                                  "Pleasant",ifelse((model1.5data$pfact_pca<0 &
                                                       model1.5data$upfact_pca<0),
                                                    "Weak",ifelse((model1.5data$pfact_pca>0 &
                                                                     model1.5data$upfact_pca>0),
                                                                  "Mixed","Unpleasant")))
model1.5data$affect_pca<- factor(model1.5data$affect_pca , ordered = FALSE )
model1.5data$affect_pca <- relevel(model1.5data$affect_pca, ref="Weak")
table(model1.5data$affect_pca)



###Table SI.4.1
##To calculate Cramer's V for the "correlation" between these different cateogorical variables
library(vcd)
catcorrm <- function(vars, dat) sapply(vars, function(y) sapply(vars, function(x) assocstats(table(dat[,x], dat[,y]))$cramer))

factors_affcor <- catcorrm(c("affect_mr_r","affect_mr_b","affect_ml_r","affect_ml_b",
                             "affect_pa_r","affect_pa_b","affect_pca"), model1.5data)
factors_affcor
xtable(factors_affcor, type='latex')

###To generate both the assignment plot and the sensitivity plot for UK1, and then to plot them together (Figure 4 in the new paper):

##The Quadraplex Assignment plot:
factors_affectplot <- model1.5data %>%
  as_tibble() %>%
  mutate(cluster = affect_mr_b) %>%
  ggplot(aes(upfact_mr_b, pfact_mr_b, color = affect_mr_b)) + geom_jitter(width = 0.25, height = 0.25) + theme_bw() + 
  guides(color=guide_legend(title="")) + ggtitle("Minres Bartlett Affective States")+
  theme(plot.title = element_text(hjust = 0.5))+
  xlab("Unpleasant Valence") + ylab("Pleasant Valence") + theme(axis.title=element_text(size=12)) + 
  theme(axis.text=element_text(size=12)) + theme(legend.text=element_text(size=12)) + 
  theme(legend.title=element_text(size=12)) + geom_hline(yintercept = 0, lty=4) + 
  geom_vline(xintercept = 0, lty=2) + scale_color_manual(values=c("#4daf4a", "#984ea3", "#377eb8", "#e41a1c"), 
                                                         labels = c("Weak","Mixed","Pleasant","Unpleasant"))
factors_affectplot

##Sensitivity Plot:
library(tidyverse)
factors_affect.df <- model1.5data %>%
  dplyr::select(affect_mr_r,affect_mr_b,affect_ml_r,affect_ml_b,affect_pa_r,affect_pa_b,affect_pca)
rownames_to_column(factors_affect.df, var="rownames")
factors_affect.df$affectsum <- NA
factors_affect.df

affect.f <- function(i, col) {
  if (factors_affect.df[i, "affect_mr_b"] == factors_affect.df[i, col]) {
    return(1)
  } else {
    return(0)
  }
}

affect.sum <- function(i) {
  return (affect.f(i,"affect_mr_b") 
          + affect.f(i,"affect_mr_r")
          + affect.f(i,"affect_ml_b")
          + affect.f(i,"affect_ml_r")
          + affect.f(i,"affect_pa_r")
          + affect.f(i,"affect_pa_b")
          + affect.f(i,"affect_pca"))
}

for(i in 1:nrow(factors_affect.df)) {
  factors_affect.df[i,]$affectsum = affect.sum(i)
}
factors_affect.df

model1.5data$affectsum <- factors_affect.df$affectsum
table(model1.5data$affectsum)

model1.5data$affectperc <- model1.5data$affectsum/7

factors_faplot <- model1.5data %>%
  as_tibble() %>%
  mutate(cluster = affect_mr_b) %>%
  ggplot(aes(upfact_mr_b, pfact_mr_b, color = factor(affectsum))) + 
  geom_jitter(width = 0.25, height = 0.25) + theme_bw() +  ggtitle("Number of FA Methods with Same Affective State Classification")+
  theme(plot.title = element_text(hjust = 0.5))+
  guides(color=guide_legend(title="")) + 
  xlab("Unpleasant Valence") + ylab("Pleasant Valence") + 
  theme(axis.title=element_text(size=12)) + theme(axis.text=element_text(size=12)) + 
  theme(legend.text=element_text(size=12)) + theme(legend.title=element_text(size=12)) + 
  geom_hline(yintercept = 0, lty=2) + geom_vline(xintercept = 0, lty=2) 
#+ scale_color_manual(values=c("#FFFF33", "#FFFF33", "#FFFF00", "#FFCC33", "#FFCC00", "#66CC00"))
factors_faplot

data4<-model1.5data

data4$affect_mr_b2_p<-ifelse(data4$affect_mr_b=="Pleasant",1,0)
data4$affect_mr_b2_u<-ifelse(data4$affect_mr_b=="Unpleasant",1,0)
data4$affect_mr_b2_m<-ifelse(data4$affect_mr_b=="Mixed",1,0)
data4$affect_mr_b2_w<-ifelse(data4$affect_mr_b=="Weak",1,0)

data4$affect_mr_r2_p<-ifelse(data4$affect_mr_r=="Pleasant",1,NA)
data4$affect_mr_r2_u<-ifelse(data4$affect_mr_r=="Unpleasant",1,0)
data4$affect_mr_r2_m<-ifelse(data4$affect_mr_r=="Mixed",1,0)
data4$affect_mr_r2_w<-ifelse(data4$affect_mr_r=="Weak",1,0)

data4$affect_ml_b2_p<-ifelse(data4$affect_ml_b=="Pleasant",1,0)
data4$affect_ml_b2_u<-ifelse(data4$affect_ml_b=="Unpleasant",1,0)
data4$affect_ml_b2_m<-ifelse(data4$affect_ml_b=="Mixed",1,0)
data4$affect_ml_b2_w<-ifelse(data4$affect_ml_b=="Weak",1,0)

data4$affect_ml_r2_p<-ifelse(data4$affect_ml_r=="Pleasant",1,0)
data4$affect_ml_r2_u<-ifelse(data4$affect_ml_r=="Unpleasant",1,0)
data4$affect_ml_r2_m<-ifelse(data4$affect_ml_r=="Mixed",1,0)
data4$affect_ml_r2_w<-ifelse(data4$affect_ml_r=="Weak",1,0)

data4$affect_pa_r2_p<-ifelse(data4$affect_pa_r=="Pleasant",1,0)
data4$affect_pa_r2_u<-ifelse(data4$affect_pa_r=="Unpleasant",1,0)
data4$affect_pa_r2_m<-ifelse(data4$affect_pa_r=="Mixed",1,0)
data4$affect_pa_r2_w<-ifelse(data4$affect_pa_r=="Weak",1,0)

data4$affect_pa_b2_p<-ifelse(data4$affect_pa_b=="Pleasant",1,0)
data4$affect_pa_b2_u<-ifelse(data4$affect_pa_b=="Unpleasant",1,0)
data4$affect_pa_b2_m<-ifelse(data4$affect_pa_b=="Mixed",1,0)
data4$affect_pa_b2_w<-ifelse(data4$affect_pa_b=="Weak",1,0)

data4$affect_pca2_p<-ifelse(data4$affect_pca=="Pleasant",1,0)
data4$affect_pca2_u<-ifelse(data4$affect_pca=="Unpleasant",1,0)
data4$affect_pca2_m<-ifelse(data4$affect_pca=="Mixed",1,0)
data4$affect_pca2_w<-ifelse(data4$affect_pca=="Weak",1,0)



survey3.2<-data4



###Survey final prep
survey3.2$cmte_spend_wiselyf<-as.factor(survey3.2$cmte_spend_wisely)
survey3.2$cmte_opinionsf<-as.factor(survey3.2$cmte_opinions)
survey3.2$cmte_validconcernsf<-as.factor(survey3.2$cmte_validconcerns)
survey3.2$cmte_onegroupf<-as.factor(survey3.2$cmte_onegroup)
survey3.2$cmte_castetrustf<-as.factor(survey3.2$cmte_castetrust)
survey3.2$neighbor_comfortf<-as.factor(survey3.2$neighbor_comfort)
survey3.2$talk_comfortf<-as.factor(survey3.2$talk_comfort)
survey3.2$donation_castef<-as.factor(survey3.2$donation_caste)
survey3.2$affect_mr_b2_pf<-as.factor(survey3.2$affect_mr_b2_p)
survey3.2$affect_mr_b2_uf<-as.factor(survey3.2$affect_mr_b2_u)
survey3.2$affect_mr_b2_mf<-as.factor(survey3.2$affect_mr_b2_m)
survey3.2$affect_mr_b2_wf<-as.factor(survey3.2$affect_mr_b2_w)


survey3.2$cmte_spend_wisely_n<-(survey3.2$cmte_spend_wisely-1)/4
survey3.2$cmte_opinions_n<-(survey3.2$cmte_opinions-1)/4
survey3.2$cmte_validconcerns_n<-(survey3.2$cmte_validconcerns-1)/4
survey3.2$cmte_onegroup_n<-(survey3.2$cmte_onegroup-1)/4
survey3.2$cmte_castetrust_n<-(survey3.2$cmte_castetrust-1)/4
survey3.2$neighbor_comfort_n<-(survey3.2$neighbor_comfort-1)/4
survey3.2$talk_comfort_n<-(survey3.2$talk_comfort-1)/4


#save(survey3.2, file="survey3.2.RData")















